//do \\c3\rdat\SHoude\Research\EEgap\EEgap_scripts\Prep_Reg_Inattention_Match_Pair_vPaper_08072018.do 

global pathname="raw_data_folder"
global censuspath="raw_data_folder"
set more off
pause on   

//local pair_version = `"`1'"'
	/*
		j=4(1)6
		pairs_matched_`j'_size2only_wmiss_kwh
		pairs_matched_`j'_size2only_nomiss_kwh
		pairs_matched_`j'_size2only_impute_kwh
	*/
//local pid_pair_id = `"`2'"'	
	/*
	   j=4(1)6
	   pid_pair_`j'_wmiss_kwh
	   pid_pair_`j'_nomiss_kwh
	   pid_pair_`j'_impute_kwh
	*/	

local sample_dataset="day_zip_size2only"

	
/*
Structure of the file: the goal is to run a linear probability model
 First we should keep the micro data with relevant pairs
 
For each pair, we have regressors: weekly difference in price, weekly difference in rebates, estar, elec costs, reviews
 
The outcome variable is pair_high or pair_low=-1*pair_high+1

reg pair_low=constant + Regressors if dominated_week==1 & available week==1

we can also interact with demographics and other observables

*/

//local sample_dataset="store_zip_restricted"
//local sample_dataset="zip_size2only"


use $pathname\lcidemo_046_2008_2012_allsales, clear

	drop if year==2007
	collapse(count) o_qty,by(store zipcode)
	ren o_qty nb_store
	collapse(count) nb_store,by(zipcode)
	sort zipcode
save $pathname\nb_store_zipcode, replace

use $pathname\lcidemo_046_2008_2012_allsales, clear

	drop if year==2007
	


		if "`sample_dataset'"=="zip_size2only"{
			sort pid
			merge pid using  $pathname\pairs_matched_2_size2only_manual_check_YQ_SH, update replace
			tab _m
			keep if _m==3
			drop _m
		
		
			sort pid_pair_2_wmiss_kwh zipcode week year	
			merge pid_pair_2_wmiss_kwh zipcode week year using $pathname\Available_Week_pairs_matched_2_size2only_manual_check_YQ_SH
		
			tab _m
			keep if _m==3
			drop _m
	
			sort pid_pair_2_wmiss_kwh  zipcode datenum week year		
			merge pid_pair_2_wmiss_kwh  zipcode datenum week year	 using $pathname\Dominated_Week_pairs_matched_2_size2only_manual_check_YQ_SH
			tab _m
			keep if _m==3
			drop _m
			
		}
		
	if "`sample_dataset'"=="day_zip_size2only"{
			sort pid
			merge pid using  $pathname\pairs_matched_2_size2only_manual_check_YQ_SH, update replace
			tab _m
			keep if _m==3
			drop _m		
		
			sort pid_pair_2_wmiss_kwh zipcode datenum	
			merge pid_pair_2_wmiss_kwh zipcode datenum using $pathname\Available_Day_pairs_matched_2_size2only_manual_check_YQ_SH
		
			tab _m
			keep if _m==3
			drop _m
	
			sort pid_pair_2_wmiss_kwh  zipcode datenum week year		
			merge pid_pair_2_wmiss_kwh  zipcode datenum week year using $pathname\Dominated_Week_pairs_matched_2_size2only_manual_check_YQ_SH
			tab _m
			keep if _m==3
			drop _m
			
		}	
			

		
//Create Regressors attributes	

	gen week_num=week	
	replace week_num=week_num+52 if year==2009
	replace week_num=week_num+104 if year==2010
	replace week_num=week_num+156 if year==2011
	replace week_num=week_num+208 if year==2012

	
	gen kwh_high_tmp=kwh if pair_high==1
	gen kwh_low_tmp=kwh if pair_high==0
	sort pid_pair_2_wmiss_kwh	
	by 	pid_pair_2_wmiss_kwh: egen kwh_high=max(kwh_high_tmp)
	by 	pid_pair_2_wmiss_kwh: egen kwh_low=max(kwh_low_tmp)
	gen delta_kwh=kwh_high-kwh_low
	
	gen mef_rel_high_tmp=mef_rel if pair_high==1
	gen mef_rel_low_tmp=mef_rel if pair_high==0
	sort pid_pair_2_wmiss_kwh	
	by 	pid_pair_2_wmiss_kwh: egen mef_rel_high=max(mef_rel_high_tmp)
	by 	pid_pair_2_wmiss_kwh: egen mef_rel_low=max(mef_rel_low_tmp)
	gen delta_mef_rel=mef_rel_high-mef_rel_low
	
/*	
	gen rank_high_tmp=sales_rank if pair_high==1
	gen rank_low_tmp=sales_rank if pair_high==0
	sort pid_pair_2_wmiss_kwh	
	by 	pid_pair_2_wmiss_kwh: egen rank_high=max(rank_high_tmp)
	by 	pid_pair_2_wmiss_kwh: egen rank_low=max(rank_low_tmp)
	gen delta_rank=rank_high-rank_low
*/	
	drop s_estar 
//	gen s_estar1=s_estar2
	gen s_estar1=estar	
	replace s_estar1=1 if standard=="15"
	gen s_estar=s_estar1
	replace s_estar=0 if standard=="15" & week_num>17
		
	gen s_estar_high_tmp=s_estar if pair_high==1
	gen s_estar_low_tmp=s_estar if pair_high==0
	sort pid_pair_2_wmiss_kwh week_num
	by 	pid_pair_2_wmiss_kwh week_num: egen s_estar_high=max(s_estar_high_tmp)
	by 	pid_pair_2_wmiss_kwh week_num: egen s_estar_low=max(s_estar_low_tmp)
	gen delta_s_estar=s_estar_high-s_estar_low

	
//Import electricity prices and rebates	
		drop if state=="GU"
		drop if state=="PR"
		drop if state=="VI"
		drop if state=="DC"
	
	//Electricity Prices	
		sort zipcode
		merge zipcode using $censuspath\mapping_zip_county_nov99_sht
		tab _m
		drop if _m==2
		drop _m
		//drop latitude longitude zip_class poname state county v8 v9
		ren county5 county_utility
	 	sort county_utility year
		//merge county_utility year using "$pathname\electricity\county_price_tmp"  
	  	merge county_utility year using "$pathname\electricity\county_elec_price_2007_2012"
		tab _m
	  	drop if _m==2  
	  	drop _m
	  	sort county_utility year
	  	merge county_utility year using "$pathname\electricity\county_elec_price_2007_2012_lag"
		tab _m
	  	drop if _m==2  
	  	drop _m
	  	sort state year
	    merge state year using $pathname\electricity\electricity_price_state_2007_2012
	    tab _m	
	    tab state if _m==1
	    drop if _m==2
	    drop _m
	  	replace pcount=p_elec if pcount==.
	  	replace pcount=pcount/100
	  	replace p_elec=p_elec/100
  	
  	
//Rebates: utility and cash for appliances
		sort state week
 		sort county_utility year week
		merge county_utility year week using "$pathname\rebate\DSIRE_rebate_week_county_2007_2013"  
		tab _m
		drop if _m==2
		drop _m
		ren incentive incentive_utility
		mvencode incentive_utility,mv(0) over 
		//replace incentive_utility=incentive_utility/100
		
		sort week state year
		merge  week state  year using  "$pathname\rebate\Cash4Appliances\cash4appliance_refrigerators_weekly_vf"
		tab _m
		drop if _m==2	
		ren _m merge_rebate
		replace incentive=0 if merge_rebate==1
		//incentive is in ,00$
		//replace incentive=incentive/100
		
		sort state year week
		merge state year week using  "$pathname\rebate\Cash4Appliances\Cash4Appliances_announcement"
		tab _m
		ren _m merge_announce
		gen week_announce_tmp=week if merge_announce==3
		
		replace week_announce=week_announce+52 if year==2009
		replace week_announce=week_announce+104 if year==2010
		replace week_announce=week_announce+156 if year==2011
		replace week_announce=week_announce+208 if year==2012
		
		by state, sort: egen week_announce=max(week_announce_tmp) 
	
		gen delta_cfa=incentive*delta_s_estar	
		gen delta_rebate_utility=incentive_utility*delta_s_estar	
		gen delta_elec_cost_st=delta_kwh*p_elec
		gen delta_elec_cost_cty=delta_kwh*pcount
		
//Create demographics		
		destring age, replace
	//Create an indicator for households with single purchase (versus contractors). 	
		destring  hd_id, replace force
	    by hd_id, sort: egen nb_purchase=count(o_qty)
	    //For the year 2010 from August to December the o_qty is not recorded. I thus assume that this is 1. 
	    //gen Dhd=cond((o_qty==1 | o_qty==0 | o_qty==.) & (nb_purchase==1 | nb_purchase==0),1,0) & housing=="S" & rent=="O"  
	    gen Dhd=cond((o_qty==1 | o_qty==0 | o_qty==.) & (nb_purchase==1 | nb_purchase==0),1,0)   
	    		
	
		gen political_id=cond(political=="R",1,cond(political=="D",2,3))
		gen income_sub=cond(income<=5,1,cond(income>=6 & income<=7,2,3))
		replace income_sub=0 if income==.
		//gen fam_size=adult+children
		gen Dchildren=cond(children==0,1,cond((children==1 | children==2),2,cond(children>=3,3,4)))
		replace Dchildren=0 if children==.
		gen Dadult=cond(adult==1,1,cond(adult==2,2,3))	
		replace Dadult=0 if adult==.
		xtile quintile_age=age,nquantiles(5)
	
		gen Dinc_utility=(incentive_utility>0)
		//gen DC4A=(Tall_l4ma2m_inc>0)
		
		
		gen Pr_low_ee=0
		replace Pr_low_ee=1 if pair_high==0
		
		sort zipcode
		merge zipcode using $pathname\nb_store_zipcode
		tab _m
		keep if _m==3
		drop _m

save $pathname\pairs_matched_2_size2only_manual_check_YQ_SH_`sample_dataset'_reg_ready, replace
		
	